{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from xgboost import XGBRFRegressor as XGBR\n",
    "import xgboost as XGB\n",
    "from numpy import *\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.linear_model import LinearRegression as LinearRegression\n",
    "from sklearn.model_selection import KFold,cross_val_score as CVS,train_test_split as TTS\n",
    "from sklearn.metrics import mean_squared_error as MSE\n",
    "wine = pd.read_excel(\"RF_lgb_Relation.xlsx\",header=0)\n",
    "wine0 = pd.read_csv(\"after_process_database.csv\",header=0)\n",
    "feature20 = pd.read_csv(\"final_features.csv\",header=0)\n",
    "feature_20=feature20.loc[:,'name'].values\n",
    "after_feature=wine.loc[:49,'Rela删除后的RF'].values\n",
    "index50 = wine0.loc[:,after_feature].values\n",
    "index20 = wine0.loc[:,feature_20].values\n",
    "yList = wine0.pIC50.values\n",
    "index20"
   ],
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[28.27793195,  0.46912557, -0.12645316, ...,  1.35723722,\n",
       "         0.73317565, -1.3313031 ],\n",
       "       [31.07674804,  0.44912557, -0.12645324, ...,  1.36403994,\n",
       "         0.73317565, -1.3786282 ],\n",
       "       [30.90106359,  0.48071295, -0.14312582, ...,  1.20091777,\n",
       "         0.83955604, -1.60835379],\n",
       "       ...,\n",
       "       [44.46643496,  0.50975105,  0.05139418, ...,  0.83563916,\n",
       "         0.95805898, -5.99302785],\n",
       "       [32.17931565,  0.52926898,  0.06039729, ...,  0.87127637,\n",
       "         0.93249605, -5.77914217],\n",
       "       [44.46643496,  0.5052159 ,  0.0600449 , ...,  0.89578922,\n",
       "         0.95805898, -5.97813907]])"
      ]
     },
     "metadata": {},
     "execution_count": 12
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "source": [
    "X_train,X_test,y_train,y_test = TTS(index20,yList,test_size=0.3)#20维度数据\n",
    "X_train.shape"
   ],
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "(1381, 20)"
      ]
     },
     "metadata": {},
     "execution_count": 13
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "source": [
    "# from sklearn.model_selection import learning_curve\n",
    "# def plot(estimator,title,x,y,ax=None,ylim = None,cv=None,):\n",
    "#     train_sizes,train_scores,test_scores = learning_curve(estimator,x,y,shuffle=True,cv=cv)\n",
    "#     if ax == None:\n",
    "#         ax = plt.gca()\n",
    "#     else :\n",
    "#         ax = plt.figure()\n",
    "#     ax.set_title(title)\n",
    "#     if ylim is not None:\n",
    "#         ax.set_ylim(*ylim)\n",
    "#     ax.set_xlabel('training example')\n",
    "#     ax.set_ylabel('Score')\n",
    "#     ax.grid()\n",
    "#     ax.plot(train_sizes,np.mean(train_scores,axis=1),'o-',color = 'r',label = 'Training score')\n",
    "#     ax.plot(train_sizes,np.mean(test_scores,axis=1),'o-',color = 'r',label = 'Testing score')\n",
    "#     ax.legend(loc = 'best')\n",
    "#     return ax\n",
    "# cv = KFold(n_splits=5,shuffle=True)\n"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "source": [
    "# plot(XGBR(n_estimator=130),'XGB',X_train,y_train)\n",
    "# plt.show()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "source": [
    "# reg= XGBR(n_estimator = 100).fit(X_train,y_train)#100ge 树"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "source": [
    "# reg.predict(X_test)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "source": [
    "# reg.score(X_test,y_test)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "source": [
    "# MSE(y_test,reg.predict(X_test))"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "source": [
    "# reg.feature_importances_"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "source": [
    "#交叉验证，导入没有训练的模型\n",
    "reg1= XGBR(n_estimator = 1).fit(index20,yList)\n",
    "# CVS(reg1,X_train,y_train,cv=5)#只在训练集上，更严谨\n"
   ],
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[01:58:20] WARNING: ../src/learner.cc:573: \n",
      "Parameters: { \"n_estimator\" } might not be used.\n",
      "\n",
      "  This may not be accurate due to some parameters are only used in language bindings but\n",
      "  passed down to XGBoost core.  Or some parameters are not used but slip through this\n",
      "  verification. Please open an issue if you find above cases.\n",
      "\n",
      "\n"
     ]
    }
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "source": [
    "# MSE=CVS(reg1,index50,yList,cv=0,scoring='neg_mean_squared_error')#只在训练集上，更严谨\n"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "source": [
    "XGB.to_graphviz(reg1, num_trees=2)"
   ],
   "outputs": [
    {
     "output_type": "error",
     "ename": "ExecutableNotFound",
     "evalue": "failed to execute 'dot', make sure the Graphviz executables are on your systems' PATH",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/graphviz/backend.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(cmd, input, capture_output, check, encoding, quiet, **kwargs)\u001b[0m\n\u001b[1;32m    169\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m         \u001b[0mproc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msubprocess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstartupinfo\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mget_startupinfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    171\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)\u001b[0m\n\u001b[1;32m    799\u001b[0m                                 \u001b[0merrread\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrwrite\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 800\u001b[0;31m                                 restore_signals, start_new_session)\n\u001b[0m\u001b[1;32m    801\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36m_execute_child\u001b[0;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)\u001b[0m\n\u001b[1;32m   1550\u001b[0m                             \u001b[0merr_msg\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m': '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1551\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1552\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'dot': 'dot'",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mExecutableNotFound\u001b[0m                        Traceback (most recent call last)",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/IPython/core/formatters.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    343\u001b[0m             \u001b[0mmethod\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_real_method\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprint_method\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    344\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mmethod\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 345\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    346\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    347\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/graphviz/files.py\u001b[0m in \u001b[0;36m_repr_svg_\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_repr_svg_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'svg'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_encoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mpipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrenderer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquiet\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/graphviz/files.py\u001b[0m in \u001b[0;36mpipe\u001b[0;34m(self, format, renderer, formatter, quiet)\u001b[0m\n\u001b[1;32m    166\u001b[0m         out = backend.pipe(self._engine, format, data,\n\u001b[1;32m    167\u001b[0m                            \u001b[0mrenderer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrenderer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m                            quiet=quiet)\n\u001b[0m\u001b[1;32m    169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    170\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/graphviz/backend.py\u001b[0m in \u001b[0;36mpipe\u001b[0;34m(engine, format, data, renderer, formatter, quiet)\u001b[0m\n\u001b[1;32m    266\u001b[0m     \"\"\"\n\u001b[1;32m    267\u001b[0m     \u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcommand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrenderer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 268\u001b[0;31m     \u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcapture_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcheck\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquiet\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mquiet\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    269\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/envs/deeplearning/lib/python3.7/site-packages/graphviz/backend.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(cmd, input, capture_output, check, encoding, quiet, **kwargs)\u001b[0m\n\u001b[1;32m    171\u001b[0m     \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    172\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 173\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mExecutableNotFound\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcmd\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    174\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    175\u001b[0m             \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mExecutableNotFound\u001b[0m: failed to execute 'dot', make sure the Graphviz executables are on your systems' PATH"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "<graphviz.files.Source at 0x7fac8cd6e650>"
      ]
     },
     "metadata": {},
     "execution_count": 24
    }
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "orig_nbformat": 4,
  "language_info": {
   "name": "python",
   "version": "3.7.10",
   "mimetype": "text/x-python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "pygments_lexer": "ipython3",
   "nbconvert_exporter": "python",
   "file_extension": ".py"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3.7.10 64-bit ('deeplearning': conda)"
  },
  "interpreter": {
   "hash": "a2441be928683876ab9582b799059e1f9f764e7f9dd2044f4d30ad5ffc1df7fc"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}